Data Loading and
Preparation
data_2020 <- read.csv("2020.csv")
data_2025 <- readRDS("crew_dataset_keys_analysis_ready.rds")
# Age group ordering
age_order <- c("Under 25", "25-29", "30-34", "35-39", "40-44", "45-49",
"50-54", "55-59", "60-64", "65 or older")
# Standardize gender function
standardize_gender <- function(gender_col) {
gender_col <- tolower(trimws(gender_col))
case_when(
gender_col %in% c("female", "woman") ~ "Women",
gender_col %in% c("male", "man") ~ "Men",
grepl("nonbinary|non-binary|gender variant", gender_col) ~ "Nonbinary",
TRUE ~ as.character(gender_col)
)
}
# Prepare 2020 data
df_2020 <- data_2020 %>%
mutate(
gender = standardize_gender(m27),
age = factor(m25, levels = age_order)
) %>%
filter(!is.na(gender) & !is.na(age) & gender != "" & age != "Decline to Answer")
# Prepare 2025 data (keep ALL columns for plotting)
df_2025 <- data_2025 %>%
mutate(
gender = standardize_gender(m27),
age = factor(m25, levels = age_order)
) %>%
filter(!is.na(gender) & !is.na(age) & gender != "" & age != "Decline to Answer")
# Color palette
gender_colors <- c("Women" = "#e74c3c", "Men" = "#3498db", "Nonbinary" = "#9b59b6")
cat("Data loaded successfully!\n")
## Data loaded successfully!
cat("2020 sample size:", nrow(df_2020), "\n")
## 2020 sample size: 2587
cat("2025 sample size:", nrow(df_2025), "\n")
## 2025 sample size: 2412
Weighting Functions and
Themes
# Enhanced plotting theme for professional analysis
theme_compensation <- theme_minimal() +
theme(
plot.title = element_text(size = 16, face = "bold", hjust = 0.5, margin = margin(b = 20)),
plot.subtitle = element_text(size = 12, hjust = 0.5, color = "gray40", margin = margin(b = 15)),
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 10),
legend.position = "bottom",
panel.grid.minor = element_blank(),
panel.grid.major.x = element_line(color = "gray90", size = 0.3),
panel.grid.major.y = element_line(color = "gray90", size = 0.3),
strip.text = element_text(size = 11, face = "bold"),
plot.background = element_rect(fill = "white", color = NA),
panel.background = element_rect(fill = "white", color = NA)
)
# Color palette
comp_colors <- c("Women" = "#e74c3c", "Men" = "#3498db", "Nonbinary" = "#9b59b6")
# Helper function for formatting currency
format_currency <- function(x) {
scales::dollar_format(scale = 1e-3, suffix = "K", accuracy = 1)(x)
}
# WEIGHTING FUNCTION FOR CATEGORICAL DATA WITH GLOBAL PERCENTAGES
apply_women_weighting <- function(data, question_col, show_breakdown = FALSE) {
plot_data <- data %>%
select(gender, age, response = all_of(question_col)) %>%
filter(!is.na(response) & response != "")
# Calculate total sample size for global percentages
total_responses <- nrow(plot_data)
# Separate men and women
men_data <- plot_data %>% filter(gender == "Men")
women_data <- plot_data %>% filter(gender == "Women")
# Men percentages (NO weighting - weight = 1.0)
men_summary <- men_data %>%
count(response) %>%
mutate(
percent = n / sum(n) * 100,
global_percent = n / total_responses * 100,
gender = "Men",
weighted_percent = percent # No weighting for men
)
# Women percentages by age group
women_by_age <- women_data %>%
count(age, response) %>%
group_by(age) %>%
mutate(percent_in_age = n / sum(n) * 100) %>%
ungroup()
# Apply 2020 age weights to women's responses
women_weighted <- women_by_age %>%
left_join(women_age_weights, by = "age") %>%
mutate(weighted_contribution = percent_in_age * weight) %>%
group_by(response) %>%
summarise(
weighted_percent = sum(weighted_contribution, na.rm = TRUE),
.groups = "drop"
) %>%
mutate(gender = "Women")
# Combine results
final_results <- bind_rows(
men_summary %>% select(response, gender, weighted_percent),
women_weighted %>% select(response, gender, weighted_percent)
)
if (show_breakdown) {
cat("\n=== WEIGHTING BREAKDOWN ===\n")
cat("Men (no weighting applied):\n")
print(men_summary)
cat("\nWomen (2020 age-weighted):\n")
print(women_weighted)
}
return(final_results)
}
# WEIGHTING FUNCTION FOR NUMERIC DATA (COMPENSATION/SCALES)
apply_compensation_weighting <- function(data, salary_col, group_col = NULL) {
# Convert salary column to numeric if it's character
if (is.character(data[[salary_col]])) {
data[[salary_col]] <- as.numeric(data[[salary_col]])
}
# Prepare data
if (is.null(group_col)) {
plot_data <- data %>%
select(gender, age, salary = all_of(salary_col)) %>%
filter(!is.na(salary) & salary > 0 & !is.infinite(salary))
} else {
plot_data <- data %>%
select(gender, age, salary = all_of(salary_col), group = all_of(group_col)) %>%
filter(!is.na(salary) & salary > 0 & !is.infinite(salary) & !is.na(group) & group != "")
}
# Separate men and women
men_data <- plot_data %>% filter(gender == "Men")
women_data <- plot_data %>% filter(gender == "Women")
if (is.null(group_col)) {
# Overall statistics
men_stats <- men_data %>%
summarise(
mean_salary = mean(salary, na.rm = TRUE),
median_salary = median(salary, na.rm = TRUE),
gender = "Men"
)
# Women by age group, then weighted
women_by_age <- women_data %>%
group_by(age) %>%
summarise(
mean_salary = mean(salary, na.rm = TRUE),
median_salary = median(salary, na.rm = TRUE),
.groups = "drop"
) %>%
left_join(women_age_weights, by = "age") %>%
filter(!is.na(weight))
women_weighted <- women_by_age %>%
summarise(
mean_salary = sum(mean_salary * weight, na.rm = TRUE),
median_salary = sum(median_salary * weight, na.rm = TRUE),
gender = "Women",
.groups = "drop"
)
return(bind_rows(men_stats, women_weighted))
} else {
# By group statistics
men_stats <- men_data %>%
group_by(group) %>%
summarise(
mean_salary = mean(salary, na.rm = TRUE),
median_salary = median(salary, na.rm = TRUE),
gender = "Men",
.groups = "drop"
)
# Women by age and group, then weighted
women_by_age_group <- women_data %>%
group_by(age, group) %>%
summarise(
mean_salary = mean(salary, na.rm = TRUE),
median_salary = median(salary, na.rm = TRUE),
.groups = "drop"
) %>%
left_join(women_age_weights, by = "age") %>%
filter(!is.na(weight))
women_weighted <- women_by_age_group %>%
group_by(group) %>%
summarise(
mean_salary = sum(mean_salary * weight, na.rm = TRUE),
median_salary = sum(median_salary * weight, na.rm = TRUE),
gender = "Women",
.groups = "drop"
)
return(bind_rows(men_stats, women_weighted))
}
}
# WEIGHTING FUNCTION FOR TRUE/FALSE DATA
apply_boolean_weighting <- function(data, question_col, show_breakdown = FALSE) {
plot_data <- data %>%
select(gender, age, response = all_of(question_col)) %>%
filter(!is.na(response)) %>%
mutate(response = ifelse(response, "Yes", "No"))
# Use the categorical weighting function
return(apply_women_weighting(plot_data %>%
select(gender, age, response), "response", show_breakdown))
}
cat("Weighting functions loaded successfully!\n")
## Weighting functions loaded successfully!
cat("✓ apply_women_weighting() - for categorical data with global percentages\n")
## ✓ apply_women_weighting() - for categorical data with global percentages
cat("✓ apply_compensation_weighting() - for numeric compensation/scale data\n")
## ✓ apply_compensation_weighting() - for numeric compensation/scale data
cat("✓ apply_boolean_weighting() - for TRUE/FALSE data\n")
## ✓ apply_boolean_weighting() - for TRUE/FALSE data
Group 1: Compensation
Analysis (Plots 1-13)
Plot 1: Annual Base
Salary by Gender
cat("Creating Plot 1: Annual Base Salary by Gender\n")
## Creating Plot 1: Annual Base Salary by Gender
salary_data <- apply_compensation_weighting(df_2025, "m21A[1_SQ001]")
# Calculate gender pay gap
women_salary <- salary_data %>% filter(gender == "Women") %>% pull(mean_salary)
men_salary <- salary_data %>% filter(gender == "Men") %>% pull(mean_salary)
salary_gap <- round((1 - women_salary/men_salary) * 100, 1)
p1 <- ggplot(salary_data, aes(x = gender, y = mean_salary, fill = gender)) +
geom_col(width = 0.6, alpha = 0.8) +
geom_text(aes(label = format_currency(mean_salary)),
vjust = -0.5, size = 5, fontface = "bold") +
scale_fill_manual(values = comp_colors) +
scale_y_continuous(labels = format_currency, expand = expansion(mult = c(0, 0.15))) +
labs(
title = "2025 Annual Base Salary by Gender",
subtitle = paste0("Women's salaries weighted by 2020 age distribution | Base salary gap: ", salary_gap, "%"),
x = "Gender",
y = "Mean Annual Base Salary",
caption = "Source: 2025 Survey Data | Women-only weighting applied"
) +
theme_compensation +
guides(fill = "none")
print(p1)

Plot 2: Commission
Earnings by Gender
cat("Creating Plot 2: Commission Earnings by Gender\n")
## Creating Plot 2: Commission Earnings by Gender
commission_data <- apply_compensation_weighting(df_2025, "m21A[3_SQ001]")
p2 <- ggplot(commission_data, aes(x = gender, y = mean_salary, fill = gender)) +
geom_col(width = 0.6, alpha = 0.8) +
geom_text(aes(label = format_currency(mean_salary)),
vjust = -0.5, size = 5, fontface = "bold") +
scale_fill_manual(values = comp_colors) +
scale_y_continuous(labels = format_currency, expand = expansion(mult = c(0, 0.15))) +
labs(
title = "2025 Commission Earnings by Gender",
subtitle = "Women's earnings weighted by 2020 age distribution | Performance-based compensation",
x = "Gender",
y = "Mean Annual Commission",
caption = "Source: 2025 Survey Data | Women-only weighting applied"
) +
theme_compensation +
guides(fill = "none")
print(p2)

Plot 3: Bonus
Earnings by Gender
cat("Creating Plot 3: Bonus Earnings by Gender\n")
## Creating Plot 3: Bonus Earnings by Gender
bonus_data <- apply_compensation_weighting(df_2025, "m21A[2_SQ001]")
p3 <- ggplot(bonus_data, aes(x = gender, y = mean_salary, fill = gender)) +
geom_col(width = 0.6, alpha = 0.8) +
geom_text(aes(label = format_currency(mean_salary)),
vjust = -0.5, size = 5, fontface = "bold") +
scale_fill_manual(values = comp_colors) +
scale_y_continuous(labels = format_currency, expand = expansion(mult = c(0, 0.15))) +
labs(
title = "2025 Short-Term Bonus Earnings by Gender",
subtitle = "Women's bonuses weighted by 2020 age distribution | Incentive compensation analysis",
x = "Gender",
y = "Mean Annual Bonus",
caption = "Source: 2025 Survey Data | Women-only weighting applied"
) +
theme_compensation +
guides(fill = "none")
print(p3)

Plot 4: Total Annual
Compensation
cat("Creating Plot 4: Total Annual Compensation\n")
## Creating Plot 4: Total Annual Compensation
total_comp_data <- apply_compensation_weighting(df_2025, "m28B")
# Calculate gender pay gap
women_total <- total_comp_data %>% filter(gender == "Women") %>% pull(mean_salary)
men_total <- total_comp_data %>% filter(gender == "Men") %>% pull(mean_salary)
pay_gap <- round((1 - women_total/men_total) * 100, 1)
p4 <- ggplot(total_comp_data, aes(x = gender, y = mean_salary, fill = gender)) +
geom_col(width = 0.6, alpha = 0.8) +
geom_text(aes(label = format_currency(mean_salary)),
vjust = -0.5, size = 5, fontface = "bold") +
scale_fill_manual(values = comp_colors) +
scale_y_continuous(labels = format_currency, expand = expansion(mult = c(0, 0.15))) +
labs(
title = "2025 Total Annual Compensation by Gender",
subtitle = paste0("Women's compensation weighted by 2020 age distribution | Gender pay gap: ", pay_gap, "%"),
x = "Gender",
y = "Mean Total Compensation",
caption = "Source: 2025 Survey Data | Includes base salary, bonus, and commission"
) +
theme_compensation +
guides(fill = "none")
print(p4)

Plot 5: Compensation
by Specialization
cat("Creating Plot 5: Compensation by Specialization\n")
## Creating Plot 5: Compensation by Specialization
spec_comp_data <- apply_compensation_weighting(df_2025, "m28B", "m1B")
# Filter to top specializations and clean up
top_specs <- spec_comp_data %>%
group_by(group) %>%
summarise(avg_salary = mean(mean_salary), .groups = "drop") %>%
top_n(8, avg_salary) %>%
pull(group)
spec_comp_filtered <- spec_comp_data %>%
filter(group %in% top_specs) %>%
mutate(group = str_wrap(group, 20))
p5 <- ggplot(spec_comp_filtered, aes(x = reorder(group, mean_salary), y = mean_salary, fill = gender)) +
geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
geom_text(aes(label = format_currency(mean_salary)),
position = position_dodge(width = 0.7), hjust = -0.1, size = 3.5) +
scale_fill_manual(values = comp_colors) +
scale_y_continuous(labels = format_currency, expand = expansion(mult = c(0, 0.15))) +
coord_flip() +
labs(
title = "2025 Total Compensation by Specialization and Gender",
subtitle = "Women's compensation weighted by 2020 age distribution | Top 8 specializations shown",
x = "Specialization",
y = "Mean Total Compensation",
fill = "Gender",
caption = "Source: 2025 Survey Data | Women-only weighting applied"
) +
theme_compensation +
theme(axis.text.y = element_text(size = 9))
print(p5)

Plot 6: Compensation
by Position Level
cat("Creating Plot 6: Compensation by Position Level\n")
## Creating Plot 6: Compensation by Position Level
position_comp_data <- apply_compensation_weighting(df_2025, "m28B", "m3A")
# Clean position names and filter out non-employed categories
position_comp_filtered <- position_comp_data %>%
filter(!group %in% c("Unemployed", "Retired")) %>%
mutate(
group = case_when(
str_detect(group, "C-Suite") ~ "C-Suite",
str_detect(group, "SVP|Vice President|Managing Director|Partner") ~ "VP/SVP/MD/Partner",
str_detect(group, "Senior level") ~ "Senior Level",
str_detect(group, "Mid-level|Associate") ~ "Mid-Level/Associate",
str_detect(group, "Entry level") ~ "Entry Level",
str_detect(group, "Self-employed|Independent") ~ "Self-Employed",
TRUE ~ group
)
) %>%
filter(!is.na(group))
# Order positions hierarchically
position_order <- c("Entry Level", "Mid-Level/Associate", "Senior Level",
"VP/SVP/MD/Partner", "C-Suite", "Self-Employed")
position_comp_filtered <- position_comp_filtered %>%
mutate(group = factor(group, levels = position_order))
p6 <- ggplot(position_comp_filtered, aes(x = group, y = mean_salary, fill = gender)) +
geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
geom_text(aes(label = format_currency(mean_salary)),
position = position_dodge(width = 0.7), vjust = -0.3, size = 3.5) +
scale_fill_manual(values = comp_colors) +
scale_y_continuous(labels = format_currency, expand = expansion(mult = c(0, 0.15))) +
labs(
title = "2025 Total Compensation by Position Level and Gender",
subtitle = "Women's compensation weighted by 2020 age distribution | Clear hierarchy shown",
x = "Position Level",
y = "Mean Total Compensation",
fill = "Gender",
caption = "Source: 2025 Survey Data | Women-only weighting applied"
) +
theme_compensation +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 9))
print(p6)

Plot 7: Compensation
by Age Groups
cat("Creating Plot 7: Compensation by Age Groups\n")
## Creating Plot 7: Compensation by Age Groups
age_comp_data <- apply_compensation_weighting(df_2025, "m28B", "m25")
# Filter to age groups with data
age_comp_filtered <- age_comp_data %>%
filter(!is.na(group)) %>%
mutate(group = factor(group, levels = age_order))
p7 <- ggplot(age_comp_filtered, aes(x = group, y = mean_salary, fill = gender)) +
geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
geom_text(aes(label = format_currency(mean_salary)),
position = position_dodge(width = 0.7), vjust = -0.3, size = 3.5) +
scale_fill_manual(values = comp_colors) +
scale_y_continuous(labels = format_currency, expand = expansion(mult = c(0, 0.15))) +
labs(
title = "2025 Total Compensation by Age Group and Gender",
subtitle = "Women's compensation weighted by 2020 age distribution | Career progression visible",
x = "Age Group",
y = "Mean Total Compensation",
fill = "Gender",
caption = "Source: 2025 Survey Data | Women-only weighting applied"
) +
theme_compensation +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
print(p7)

Plot 8: Compensation
by Years Experience
cat("Creating Plot 8: Compensation by Years Experience\n")
## Creating Plot 8: Compensation by Years Experience
# Note: Based on inspection, m4A only has "Less than one year" and "Other"
# Create a message about limited data
cat("Note: Years Experience data is limited to 'Less than one year' and 'Other' categories\n")
## Note: Years Experience data is limited to 'Less than one year' and 'Other' categories
exp_comp_data <- apply_compensation_weighting(df_2025, "m28B", "m4A")
p8 <- ggplot(exp_comp_data, aes(x = group, y = mean_salary, fill = gender)) +
geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
geom_text(aes(label = format_currency(mean_salary)),
position = position_dodge(width = 0.7), vjust = -0.3, size = 4) +
scale_fill_manual(values = comp_colors) +
scale_y_continuous(labels = format_currency, expand = expansion(mult = c(0, 0.15))) +
labs(
title = "2025 Total Compensation by Years Experience and Gender",
subtitle = "Women's compensation weighted by 2020 age distribution | Limited experience categories available",
x = "Years of Experience",
y = "Mean Total Compensation",
fill = "Gender",
caption = "Source: 2025 Survey Data | Note: Limited experience data available"
) +
theme_compensation
print(p8)

Plot 9: Compensation
by Company Size
cat("Creating Plot 9: Compensation by Company Size\n")
## Creating Plot 9: Compensation by Company Size
company_comp_data <- apply_compensation_weighting(df_2025, "m28B", "m29")
# Clean company size categories and order logically
company_comp_filtered <- company_comp_data %>%
filter(!group %in% c("Don't know") & !is.na(group)) %>%
mutate(
group = case_when(
group == "Less than $100,000" ~ "<$100K",
group == "$100,000-$249,999" ~ "$100K-$250K",
group == "$250,000-$499,999" ~ "$250K-$500K",
group == "$500,000 - $999,999" ~ "$500K-$1M",
group == "$1 Million - $4.9 Million" ~ "$1M-$5M",
group == "$5 Million - $9.9 Million" ~ "$5M-$10M",
group == "$10 Million - $19.9 Million" ~ "$10M-$20M",
group == "$20 Million - $49.9 Million" ~ "$20M-$50M",
group == "$50 Million - $99.9 Million" ~ "$50M-$100M",
group == "$100 Million - $299 Million" ~ "$100M-$300M",
group == "$300 Million - $499 Million" ~ "$300M-$500M",
group == "$500 Million - $999 Million" ~ "$500M-$1B",
group == "More than $1 Billion" ~ ">$1B",
TRUE ~ group
)
)
# Order by company size
size_order <- c("<$100K", "$100K-$250K", "$250K-$500K", "$500K-$1M", "$1M-$5M",
"$5M-$10M", "$10M-$20M", "$20M-$50M", "$50M-$100M", "$100M-$300M",
"$300M-$500M", "$500M-$1B", ">$1B")
company_comp_filtered <- company_comp_filtered %>%
mutate(group = factor(group, levels = size_order))
p9 <- ggplot(company_comp_filtered, aes(x = group, y = mean_salary, fill = gender)) +
geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
geom_text(aes(label = format_currency(mean_salary)),
position = position_dodge(width = 0.7), vjust = -0.3, size = 3, angle = 90) +
scale_fill_manual(values = comp_colors) +
scale_y_continuous(labels = format_currency, expand = expansion(mult = c(0, 0.15))) +
labs(
title = "2025 Total Compensation by Company Size and Gender",
subtitle = "Women's compensation weighted by 2020 age distribution | Annual revenue categories",
x = "Company Annual Revenue",
y = "Mean Total Compensation",
fill = "Gender",
caption = "Source: 2025 Survey Data | Women-only weighting applied"
) +
theme_compensation +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8))
print(p9)

Plot 10:
Compensation Changes 2023-2024
cat("Creating Plot 10: Compensation Changes 2023-2024\n")
## Creating Plot 10: Compensation Changes 2023-2024
comp_change_data <- apply_women_weighting(df_2025, "m22A")
# Clean up the change categories
comp_change_filtered <- comp_change_data %>%
filter(!is.na(response)) %>%
mutate(
response = case_when(
response == "Increase:" ~ "Increased",
response == "Decrease:" ~ "Decreased",
response == "Stayed the same" ~ "Stayed the Same",
TRUE ~ response
)
)
p10 <- ggplot(comp_change_filtered, aes(x = response, y = weighted_percent, fill = gender)) +
geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
geom_text(aes(label = paste0(round(weighted_percent, 1), "%")),
position = position_dodge(width = 0.7), vjust = -0.5, size = 4, fontface = "bold") +
scale_fill_manual(values = comp_colors) +
scale_y_continuous(labels = function(x) paste0(x, "%"), expand = expansion(mult = c(0, 0.15))) +
labs(
title = "2025 Compensation Changes from 2023-2024 by Gender",
subtitle = "Women's responses weighted by 2020 age distribution | Global percentages shown",
x = "Compensation Change Direction",
y = "Percentage of Respondents",
fill = "Gender",
caption = "Source: 2025 Survey Data | Women-only weighting applied"
) +
theme_compensation
print(p10)

Plot 11: Projected
2025 Compensation Changes
cat("Creating Plot 11: Projected 2025 Compensation Changes\n")
## Creating Plot 11: Projected 2025 Compensation Changes
proj_change_data <- apply_women_weighting(df_2025, "m22C")
# Clean up the projected change categories
proj_change_filtered <- proj_change_data %>%
filter(!is.na(response)) %>%
mutate(
response = case_when(
response == "Increase:" ~ "Expect Increase",
response == "Decrease:" ~ "Expect Decrease",
response == "Stay the same" ~ "Expect No Change",
TRUE ~ response
)
)
p11 <- ggplot(proj_change_filtered, aes(x = response, y = weighted_percent, fill = gender)) +
geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
geom_text(aes(label = paste0(round(weighted_percent, 1), "%")),
position = position_dodge(width = 0.7), vjust = -0.5, size = 4, fontface = "bold") +
scale_fill_manual(values = comp_colors) +
scale_y_continuous(labels = function(x) paste0(x, "%"), expand = expansion(mult = c(0, 0.15))) +
labs(
title = "2025 Projected Compensation Changes by Gender",
subtitle = "Women's responses weighted by 2020 age distribution | Forward-looking expectations",
x = "Expected Compensation Change",
y = "Percentage of Respondents",
fill = "Gender",
caption = "Source: 2025 Survey Data | Women-only weighting applied"
) +
theme_compensation
print(p11)

Plot 12: Commission
Willingness
cat("Creating Plot 12: Commission Willingness\n")
## Creating Plot 12: Commission Willingness
commission_data <- apply_women_weighting(df_2025, "m23B")
commission_filtered <- commission_data %>%
filter(!is.na(response))
p12 <- ggplot(commission_filtered, aes(x = response, y = weighted_percent, fill = gender)) +
geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
geom_text(aes(label = paste0(round(weighted_percent, 1), "%")),
position = position_dodge(width = 0.7), vjust = -0.5, size = 4, fontface = "bold") +
scale_fill_manual(values = comp_colors) +
scale_y_continuous(labels = function(x) paste0(x, "%"), expand = expansion(mult = c(0, 0.15))) +
labs(
title = "2025 Willingness to Accept Commission-Based Positions by Gender",
subtitle = "Women's responses weighted by 2020 age distribution | Career path preferences",
x = "Commission Willingness",
y = "Percentage of Respondents",
fill = "Gender",
caption = "Source: 2025 Survey Data | Women-only weighting applied"
) +
theme_compensation
print(p12)

Plot 13:
Compensation Structure Role in Career
cat("Creating Plot 13: Compensation Structure Role in Career\n")
## Creating Plot 13: Compensation Structure Role in Career
comp_structure_data <- apply_women_weighting(df_2025, "m24")
# Clean up and wrap long text for compensation structure responses
comp_structure_filtered <- comp_structure_data %>%
filter(!is.na(response)) %>%
mutate(
response = case_when(
str_detect(response, "not part of the compensation structure") ~ "Commissions Not\nPart of Career",
str_detect(response, "actively pursued") ~ "Actively Pursued\nCommission Career",
str_detect(response, "does not play a significant role") ~ "Commission Role\nNot Significant",
str_detect(response, "altered.*to avoid") ~ "Altered Career to\nAvoid Commission",
response == "None of these" ~ "None of These",
TRUE ~ str_wrap(response, 20)
)
)
p13 <- ggplot(comp_structure_filtered, aes(x = reorder(response, weighted_percent), y = weighted_percent, fill = gender)) +
geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
geom_text(aes(label = paste0(round(weighted_percent, 1), "%")),
position = position_dodge(width = 0.7), hjust = -0.1, size = 3.5) +
scale_fill_manual(values = comp_colors) +
scale_y_continuous(labels = function(x) paste0(x, "%"), expand = expansion(mult = c(0, 0.15))) +
coord_flip() +
labs(
title = "2025 Role of Compensation Structure in Career Path by Gender",
subtitle = "Women's responses weighted by 2020 age distribution | Commission-related career decisions",
x = "Career Path Relationship to Commission",
y = "Percentage of Respondents",
fill = "Gender",
caption = "Source: 2025 Survey Data | Women-only weighting applied"
) +
theme_compensation +
theme(axis.text.y = element_text(size = 9))
print(p13)

Group 2: Career
Trajectory & Satisfaction (Plots 14-22)
Plot 14: Career
Aspirations by Gender
cat("Creating Plot 14: Career Aspirations by Gender\n")
## Creating Plot 14: Career Aspirations by Gender
career_asp_data <- apply_women_weighting(df_2025, "m4C")
# Clean up career aspiration categories
career_asp_filtered <- career_asp_data %>%
filter(!is.na(response)) %>%
mutate(
response = case_when(
str_detect(response, "C-Suite") ~ "C-Suite",
str_detect(response, "SVP|Vice President|Managing Director|Partner") ~ "VP/SVP/MD/Partner",
str_detect(response, "Senior level") ~ "Senior Level",
str_detect(response, "Mid-level|Associate") ~ "Mid-Level/Associate",
str_detect(response, "Entry level") ~ "Entry Level",
str_detect(response, "Self-employed|Independent") ~ "Self-Employed",
str_detect(response, "Not motivated") ~ "Not Title-Motivated",
TRUE ~ response
)
)
# Order aspirations hierarchically
aspiration_order <- c("Entry Level", "Mid-Level/Associate", "Senior Level",
"VP/SVP/MD/Partner", "C-Suite", "Self-Employed", "Not Title-Motivated")
career_asp_filtered <- career_asp_filtered %>%
mutate(response = factor(response, levels = aspiration_order))
p14 <- ggplot(career_asp_filtered, aes(x = response, y = weighted_percent, fill = gender)) +
geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
geom_text(aes(label = paste0(round(weighted_percent, 1), "%")),
position = position_dodge(width = 0.7), vjust = -0.3, size = 4, fontface = "bold") +
scale_fill_manual(values = comp_colors) +
scale_y_continuous(labels = function(x) paste0(x, "%"), expand = expansion(mult = c(0, 0.15))) +
labs(
title = "2025 Career Aspirations by Gender",
subtitle = "Women's responses weighted by 2020 age distribution | Peak career level goals",
x = "Aspired Career Level",
y = "Percentage of Respondents",
fill = "Gender",
caption = "Source: 2025 Survey Data | Women-only weighting applied"
) +
theme_compensation +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 9))
print(p14)

Plot 15: Career
Aspirations by Age Group
cat("Creating Plot 15: Career Aspirations by Age Group\n")
## Creating Plot 15: Career Aspirations by Age Group
# Create age-career aspiration cross-tabulation with women's weighting
career_age_data <- df_2025 %>%
select(gender, age, career_asp = m4C, age_group = m25) %>%
filter(!is.na(career_asp) & !is.na(age_group) & !is.na(gender) &
age_group != "Decline to Answer" & career_asp != "") %>%
mutate(
career_asp_clean = case_when(
str_detect(career_asp, "C-Suite") ~ "C-Suite",
str_detect(career_asp, "SVP|Vice President|Managing Director|Partner") ~ "VP/SVP/MD/Partner",
str_detect(career_asp, "Senior level") ~ "Senior Level",
str_detect(career_asp, "Mid-level|Associate") ~ "Mid-Level/Associate",
str_detect(career_asp, "Not motivated") ~ "Not Title-Motivated",
TRUE ~ "Other"
),
age_group = factor(age_group, levels = age_order)
)
# Separate men and women for weighting
men_career_age <- career_age_data %>%
filter(gender == "Men") %>%
count(age_group, career_asp_clean) %>%
group_by(age_group) %>%
mutate(percent = n / sum(n) * 100) %>%
ungroup() %>%
mutate(gender = "Men", weighted_percent = percent)
women_career_age <- career_age_data %>%
filter(gender == "Women") %>%
count(age, career_asp_clean) %>%
group_by(age) %>%
mutate(percent_in_age = n / sum(n) * 100) %>%
ungroup() %>%
left_join(women_age_weights, by = "age") %>%
mutate(weighted_contribution = percent_in_age * weight) %>%
group_by(career_asp_clean) %>%
summarise(weighted_percent = sum(weighted_contribution, na.rm = TRUE), .groups = "drop") %>%
mutate(gender = "Women", age_group = "Overall Women (Weighted)")
# Combine for plotting - focus on key aspirations and age groups
career_age_combined <- bind_rows(
men_career_age %>% select(age_group, career_asp_clean, gender, weighted_percent),
women_career_age %>% select(age_group, career_asp_clean, gender, weighted_percent)
)
career_age_filtered <- career_age_combined %>%
filter(career_asp_clean %in% c("C-Suite", "VP/SVP/MD/Partner", "Senior Level", "Mid-Level/Associate")) %>%
filter(age_group %in% c("25-29", "30-34", "35-39", "40-44", "45-49", "50-54", "Overall Women (Weighted)"))
p15 <- ggplot(career_age_filtered, aes(x = age_group, y = weighted_percent, fill = career_asp_clean)) +
geom_col(position = "stack", alpha = 0.8) +
facet_wrap(~gender, scales = "free_x") +
scale_fill_viridis_d(name = "Career Aspiration", option = "plasma") +
scale_y_continuous(labels = function(x) paste0(x, "%"), expand = expansion(mult = c(0, 0.05))) +
labs(
title = "2025 Career Aspirations by Age Group and Gender",
subtitle = "Women's responses weighted by 2020 age distribution | Stacked view of aspirations",
x = "Age Group",
y = "Percentage of Respondents",
caption = "Source: 2025 Survey Data | Women-only weighting applied"
) +
theme_compensation +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
strip.text = element_text(size = 12, face = "bold")
)
print(p15)

Plot 16: Current
Position by Gender
cat("Creating Plot 16: Current Position by Gender\n")
## Creating Plot 16: Current Position by Gender
current_pos_data <- apply_women_weighting(df_2025, "m3A")
# Clean up current position categories and filter out non-employed
current_pos_filtered <- current_pos_data %>%
filter(!response %in% c("Unemployed", "Retired") & !is.na(response)) %>%
mutate(
response = case_when(
str_detect(response, "C-Suite") ~ "C-Suite",
str_detect(response, "SVP|Vice President|Managing Director|Partner") ~ "VP/SVP/MD/Partner",
str_detect(response, "Senior level") ~ "Senior Level",
str_detect(response, "Mid-level|Associate") ~ "Mid-Level/Associate",
str_detect(response, "Entry level") ~ "Entry Level",
str_detect(response, "Self-employed|Independent") ~ "Self-Employed",
TRUE ~ response
)
)
# Order positions hierarchically
position_order <- c("Entry Level", "Mid-Level/Associate", "Senior Level",
"VP/SVP/MD/Partner", "C-Suite", "Self-Employed")
current_pos_filtered <- current_pos_filtered %>%
mutate(response = factor(response, levels = position_order))
p16 <- ggplot(current_pos_filtered, aes(x = response, y = weighted_percent, fill = gender)) +
geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
geom_text(aes(label = paste0(round(weighted_percent, 1), "%")),
position = position_dodge(width = 0.7), vjust = -0.3, size = 4, fontface = "bold") +
scale_fill_manual(values = comp_colors) +
scale_y_continuous(labels = function(x) paste0(x, "%"), expand = expansion(mult = c(0, 0.15))) +
labs(
title = "2025 Current Position Distribution by Gender",
subtitle = "Women's responses weighted by 2020 age distribution | Career level representation",
x = "Current Position Level",
y = "Percentage of Respondents",
fill = "Gender",
caption = "Source: 2025 Survey Data | Women-only weighting applied"
) +
theme_compensation +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 9))
print(p16)

Plot 17: Success
Level Self-Assessment
cat("Creating Plot 17: Success Level Self-Assessment\n")
## Creating Plot 17: Success Level Self-Assessment
success_data <- apply_compensation_weighting(df_2025, "m15[SQ001]")
p17 <- ggplot(success_data, aes(x = gender, y = mean_salary, fill = gender)) +
geom_col(width = 0.6, alpha = 0.8) +
geom_text(aes(label = round(mean_salary, 1)),
vjust = -0.5, size = 5, fontface = "bold") +
scale_fill_manual(values = comp_colors) +
scale_y_continuous(limits = c(0, 10), expand = expansion(mult = c(0, 0.15))) +
labs(
title = "2025 Career Success Self-Assessment by Gender",
subtitle = "Women's responses weighted by 2020 age distribution | Scale: 1 (Low) - 10 (High)",
x = "Gender",
y = "Mean Success Level (1-10 Scale)",
caption = "Source: 2025 Survey Data | Women-only weighting applied"
) +
theme_compensation +
guides(fill = "none")
print(p17)

Plot 18: Satisfaction
with Career Success
cat("Creating Plot 18: Satisfaction with Career Success\n")
## Creating Plot 18: Satisfaction with Career Success
career_sat_data <- apply_compensation_weighting(df_2025, "m16[SQ001]")
p18 <- ggplot(career_sat_data, aes(x = gender, y = mean_salary, fill = gender)) +
geom_col(width = 0.6, alpha = 0.8) +
geom_text(aes(label = round(mean_salary, 1)),
vjust = -0.5, size = 5, fontface = "bold") +
scale_fill_manual(values = comp_colors) +
scale_y_continuous(limits = c(0, 10), expand = expansion(mult = c(0, 0.15))) +
labs(
title = "2025 Satisfaction with Career Success by Gender",
subtitle = "Women's responses weighted by 2020 age distribution | Scale: 1 (Not Satisfied) - 10 (Very Satisfied)",
x = "Gender",
y = "Mean Satisfaction Level (1-10 Scale)",
caption = "Source: 2025 Survey Data | Women-only weighting applied"
) +
theme_compensation +
guides(fill = "none")
print(p18)

Plot 19: Work/Life
Balance Satisfaction
cat("Creating Plot 19: Work/Life Balance Satisfaction\n")
## Creating Plot 19: Work/Life Balance Satisfaction
balance_data <- apply_compensation_weighting(df_2025, "m16[SQ002]")
p19 <- ggplot(balance_data, aes(x = gender, y = mean_salary, fill = gender)) +
geom_col(width = 0.6, alpha = 0.8) +
geom_text(aes(label = round(mean_salary, 1)),
vjust = -0.5, size = 5, fontface = "bold") +
scale_fill_manual(values = comp_colors) +
scale_y_continuous(limits = c(0, 10), expand = expansion(mult = c(0, 0.15))) +
labs(
title = "2025 Work/Life Balance Satisfaction by Gender",
subtitle = "Women's responses weighted by 2020 age distribution | Scale: 1 (Not Satisfied) - 10 (Very Satisfied)",
x = "Gender",
y = "Mean Work/Life Balance Satisfaction (1-10 Scale)",
caption = "Source: 2025 Survey Data | Women-only weighting applied"
) +
theme_compensation +
guides(fill = "none")
print(p19)

Plot 20: Career
Transitions
cat("Creating Plot 20: Career Transitions\n")
## Creating Plot 20: Career Transitions
career_trans_data <- apply_women_weighting(df_2025, "m4D")
# Clean up transition categories
career_trans_filtered <- career_trans_data %>%
filter(!is.na(response)) %>%
mutate(
response = case_when(
response == "0" ~ "No Transitions",
response == "1" ~ "1 Transition",
response == "2" ~ "2 Transitions",
response == "3" ~ "3 Transitions",
response %in% c("4", "5") ~ "4-5 Transitions",
response %in% c("6", "7", "8", "9") ~ "6-9 Transitions",
response == "10 or more" ~ "10+ Transitions",
TRUE ~ response
)
)
# Order transitions logically
transition_order <- c("No Transitions", "1 Transition", "2 Transitions", "3 Transitions",
"4-5 Transitions", "6-9 Transitions", "10+ Transitions")
career_trans_filtered <- career_trans_filtered %>%
mutate(response = factor(response, levels = transition_order))
p20 <- ggplot(career_trans_filtered, aes(x = response, y = weighted_percent, fill = gender)) +
geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
geom_text(aes(label = paste0(round(weighted_percent, 1), "%")),
position = position_dodge(width = 0.7), vjust = -0.3, size = 3.5, fontface = "bold") +
scale_fill_manual(values = comp_colors) +
scale_y_continuous(labels = function(x) paste0(x, "%"), expand = expansion(mult = c(0, 0.15))) +
labs(
title = "2025 Career Transitions (Company Changes) by Gender",
subtitle = "Women's responses weighted by 2020 age distribution | Job mobility patterns",
x = "Number of Company Transitions",
y = "Percentage of Respondents",
fill = "Gender",
caption = "Source: 2025 Survey Data | Women-only weighting applied"
) +
theme_compensation +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 9))
print(p20)

Plot 21: Promotions
& Lateral Moves
cat("Creating Plot 21: Promotions & Lateral Moves\n")
## Creating Plot 21: Promotions & Lateral Moves
# Process promotion and lateral move data
promo_data <- df_2025 %>%
select(gender, age,
accepted_promos = `m12A[SQ001]`,
declined_promos = `m12A[SQ002]`,
accepted_lateral = `m12A[SQ003]`,
declined_lateral = `m12A[SQ004]`) %>%
filter(!is.na(gender) & !is.na(age)) %>%
pivot_longer(cols = c(accepted_promos, declined_promos, accepted_lateral, declined_lateral),
names_to = "move_type", values_to = "count") %>%
filter(!is.na(count)) %>%
mutate(
move_type = case_when(
move_type == "accepted_promos" ~ "Accepted Promotions",
move_type == "declined_promos" ~ "Declined Promotions",
move_type == "accepted_lateral" ~ "Accepted Lateral Moves",
move_type == "declined_lateral" ~ "Declined Lateral Moves"
),
count = as.numeric(count)
)
# Apply weighting for each move type
move_results <- map_dfr(unique(promo_data$move_type), function(move) {
move_subset <- promo_data %>% filter(move_type == move)
# Separate men and women
men_stats <- move_subset %>%
filter(gender == "Men") %>%
summarise(mean_count = mean(count, na.rm = TRUE), gender = "Men")
# Women by age, then weighted
women_by_age <- move_subset %>%
filter(gender == "Women") %>%
group_by(age) %>%
summarise(mean_count = mean(count, na.rm = TRUE), .groups = "drop") %>%
left_join(women_age_weights, by = "age") %>%
filter(!is.na(weight))
women_weighted <- women_by_age %>%
summarise(mean_count = sum(mean_count * weight, na.rm = TRUE), gender = "Women")
bind_rows(men_stats, women_weighted) %>%
mutate(move_type = move)
})
p21 <- ggplot(move_results, aes(x = move_type, y = mean_count, fill = gender)) +
geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
geom_text(aes(label = round(mean_count, 1)),
position = position_dodge(width = 0.7), vjust = -0.3, size = 3.5, fontface = "bold") +
scale_fill_manual(values = comp_colors) +
scale_y_continuous(expand = expansion(mult = c(0, 0.15))) +
labs(
title = "2025 Career Moves: Promotions & Lateral Moves by Gender",
subtitle = "Women's responses weighted by 2020 age distribution | Mean number of moves",
x = "Type of Career Move",
y = "Mean Number of Moves",
fill = "Gender",
caption = "Source: 2025 Survey Data | Women-only weighting applied"
) +
theme_compensation +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 9))
print(p21)

Plot 22: Job Change
Motivations
cat("Creating Plot 22: Job Change Motivations\n")
## Creating Plot 22: Job Change Motivations
# Process all motivation variables
motivation_cols <- c("m12C[SQ001]", "m12C[SQ002]", "m12C[SQ003]",
"m12C[SQ004]", "m12C[SQ005]", "m12C[SQ006]", "m12C[SQ008]")
motivation_labels <- c(
"m12C[SQ001]" = "Looking for Change",
"m12C[SQ002]" = "Greater Advancement",
"m12C[SQ003]" = "Flexible Schedule",
"m12C[SQ004]" = "Better Compensation",
"m12C[SQ005]" = "Lost Prior Job",
"m12C[SQ006]" = "Gain Experience",
"m12C[SQ008]" = "Other Reasons"
)
# Combine all motivations
motivation_results <- map_dfr(motivation_cols, function(col) {
if (col %in% colnames(df_2025)) {
result <- apply_boolean_weighting(df_2025, col)
result$motivation = motivation_labels[col]
return(result %>% filter(response == "Yes"))
}
return(NULL)
})
p22 <- ggplot(motivation_results, aes(x = reorder(motivation, weighted_percent),
y = weighted_percent, fill = gender)) +
geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
geom_text(aes(label = paste0(round(weighted_percent, 1), "%")),
position = position_dodge(width = 0.7), hjust = -0.1, size = 3.5) +
scale_fill_manual(values = comp_colors) +
scale_y_continuous(labels = function(x) paste0(x, "%"), expand = expansion(mult = c(0, 0.15))) +
coord_flip() +
labs(
title = "2025 Job Change Motivations by Gender",
subtitle = "Women's responses weighted by 2020 age distribution | % who selected each motivation",
x = "Motivation for Job Change",
y = "Percentage Who Selected This Motivation",
fill = "Gender",
caption = "Source: 2025 Survey Data | Women-only weighting applied"
) +
theme_compensation +
theme(axis.text.y = element_text(size = 9))
print(p22)

Executive Summary
cat("\n=== EXECUTIVE SUMMARY ===\n")
##
## === EXECUTIVE SUMMARY ===
# Calculate key insights
base_salary_gap <- round((1 - women_salary/men_salary) * 100, 1)
total_comp_gap <- round((1 - women_total/men_total) * 100, 1)
# Success levels
success_levels <- apply_compensation_weighting(df_2025, "m15[SQ001]")
women_success <- success_levels %>% filter(gender == "Women") %>% pull(mean_salary)
men_success <- success_levels %>% filter(gender == "Men") %>% pull(mean_salary)
# Career satisfaction
career_satisfaction <- apply_compensation_weighting(df_2025, "m16[SQ001]")
women_career_sat <- career_satisfaction %>% filter(gender == "Women") %>% pull(mean_salary)
men_career_sat <- career_satisfaction %>% filter(gender == "Men") %>% pull(mean_salary)
# Work-life balance
worklife_balance <- apply_compensation_weighting(df_2025, "m16[SQ002]")
women_balance <- worklife_balance %>% filter(gender == "Women") %>% pull(mean_salary)
men_balance <- worklife_balance %>% filter(gender == "Men") %>% pull(mean_salary)
summary_data <- data.frame(
Metric = c("Base Salary Gap (%)", "Total Compensation Gap (%)",
"Success Level (Women)", "Success Level (Men)",
"Career Satisfaction (Women)", "Career Satisfaction (Men)",
"Work-Life Balance (Women)", "Work-Life Balance (Men)"),
Value = c(paste0(base_salary_gap, "%"), paste0(total_comp_gap, "%"),
round(women_success, 1), round(men_success, 1),
round(women_career_sat, 1), round(men_career_sat, 1),
round(women_balance, 1), round(men_balance, 1))
)
kable(summary_data,
caption = "Key Findings Summary",
col.names = c("Key Metric", "Value"),
align = c("l", "c")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE, position = "center") %>%
column_spec(1, bold = TRUE) %>%
row_spec(0, bold = TRUE, color = "white", background = "#3498db")
Key Findings Summary
|
Key Metric
|
Value
|
|
Base Salary Gap (%)
|
19.9%
|
|
Total Compensation Gap (%)
|
42.8%
|
|
Success Level (Women)
|
7.5
|
|
Success Level (Men)
|
7.6
|
|
Career Satisfaction (Women)
|
7.4
|
|
Career Satisfaction (Men)
|
7.4
|
|
Work-Life Balance (Women)
|
7.2
|
|
Work-Life Balance (Men)
|
7.4
|
cat("\n=== KEY INSIGHTS ===\n")
##
## === KEY INSIGHTS ===
cat("• Base salary gap:", base_salary_gap, "%\n")
## • Base salary gap: 19.9 %
cat("• Total compensation gap:", total_comp_gap, "%\n")
## • Total compensation gap: 42.8 %
cat("• Women's self-assessed success level:", round(women_success, 1), "/10\n")
## • Women's self-assessed success level: 7.5 /10
cat("• Women's career satisfaction:", round(women_career_sat, 1), "/10\n")
## • Women's career satisfaction: 7.4 /10
cat("• Women's work-life balance satisfaction:", round(women_balance, 1), "/10\n")
## • Women's work-life balance satisfaction: 7.2 /10